#!/usr/bin/env python
# -*- coding: utf-8 -*-
# gmi-backup.py: make generation managed incremental backup using rsync
#                command.
# by knono549
# $Id$
#
# Copyright 2012 knono549 <knono549@gmail.com>
#
# This file is part of gmi-backup
#
# gmi-backup is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 2 of the License, or (at your option)
# any later version.
#
# gmi-backup is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# gmi-backup.  If not, see <http://www.gnu.org/licenses/>.
#

import datetime
import errno
import getopt
import inspect
import os
import re
import shutil
import stat
import subprocess
import sys
import traceback

## root error
class GMIError(Exception):
    pass

## file system related error
class FSError(GMIError):
    pass

## external command related error
class ECError(GMIError):
    pass

##
class Entry(object):
    ## constructor
    # @param path str
    def __init__(self, path):
        ## str
        self.path = path

    ##
    # @param other Entry
    # @return int
    def __cmp__(self, other):
        if self.path == other.path: return 0
        if self.path == '.': return -1
        if other.path == '.': return 1
        return cmp(self.path, other.path)

    ##
    # @return int
    def __hash__(self):
        return hash(self.path)

##
class MEntry(Entry):
    MOD_OR_CRE = 0
    MODIFY = 1
    CREATE = 2
    DELETE = 3
    IGNORE = 4

    ## constructor
    # @param path str
    # @param mark int
    def __init__(self, path, mark):
        super(MEntry, self).__init__(path)

        ## int
        self.mark = mark

##
class SEntry(Entry):
    ## constructor
    # @param path str
    # @param st stat_result
    def __init__(self, path, st):
        super(SEntry, self).__init__(path)

        ## stat_result
        self.stat = st

##
# @return None
def print_usage():
    print """usage: python gmi-backup.py [option] src dest

  - option
    -g num
     number of older generation directories to be kept
    --version
     print the version

  - src
    source directory

  - dest
    destination directory"""
    return

##
# @return None
def print_version():
    ver = '0.1'
    rev = '$Revision$'
    rev = rev.startswith('$Revision$') and \
      rev[11:-2] or '????????????'
    date = '$Date$'
    date = date.startswith('$Date$') and \
      date[7:-2] or '????/??/?? ??:??:??'
    print 'gmi-backup.py %s (%s %s)' % (ver, rev, date)
    return

##
# @param msg str
# @return None
def print_error(msg):
    sys.stderr.write(msg)
    frame = inspect.currentframe(1)
    code = frame.f_code
    sys.stderr.write(" (File \"%s\", line %d, in %s)\n" %
                       (code.co_filename, frame.f_lineno, code.co_name))
    return

##
# @param msg str
# @return None
def write(msg):
    sys.stdout.write(msg)
    return

##
# @param path str
# @return list<str>
def get_parent_directories(path):
    dirs = []

    old = os.path.normpath(path)
    new = os.path.dirname(old)
    while new != '' and new != old:
        dirs.append(new)
        old = new
        new = os.path.dirname(old)
    #dirs.reverse()

    return dirs

##
# @param path str
# @return None
def make_parent_directories(path):
    dirs = get_parent_directories(path)
    dirs.reverse()

    for dir in dirs:
        try:
            os.mkdir(dir)
        except OSError, err:
            if err.errno != errno.EEXIST:
                raise FSError('Could not create a directory: ' + dir)

    return

##
# @param path str
# @return stat_result
def make_directory_and_get_stat(path):
    path_st = None

    try:
        path_st = os.stat(path)
    except OSError, err:
        if err.errno != errno.ENOENT:
            raise FSError('Could not get stat info of directory: ' + path)

    if not path_st:
        try:
            os.mkdir(path)
        except OSError, err:
            raise FSError('Could not create a directory: ' + path)

        try:
            path_st = os.stat(path)
        except OSError, err:
            raise FSError('Could not get stat info of directory: ' + path)
    elif not stat.S_ISDIR(path_st.st_mode):
        raise FSError('Could not create a directory: ' + path)

    return path_st

##
# @param parent str
# @return str
def make_older_generation_directory(parent=''):
    date = datetime.date.today().strftime('%Y-%m-%d')
    rev = 0
    old = os.path.join(parent, '%s-%02d' % (date, rev))

    while os.path.exists(old):
        rev += 1
        if rev > 99:
            raise FSError('Could not create a directory: ' + old)
        old = os.path.join(parent, '%s-%02d' % (date, rev))

    try:
        os.mkdir(old)
    except OSError, err:
        raise FSError('Could not create a directory: ' + old)

    return old

##
# @param path str
# @return None
def remove_files_recursively(path):
    path_stack = [path]
    path_st_stack = [None]
    entries_stack = [None]
    index_stack = [-1]

    while True:
        if len(path_stack) <= 0: break

        path = path_stack[-1]
        path_st = path_st_stack[-1]
        if not path_st:
            try:
                path_st_stack[-1] = path_st = os.lstat(path)
            except OSError, err:
                raise FSError('Could not get stat info of file: ' + path)

        if stat.S_ISLNK(path_st.st_mode) or \
           stat.S_ISREG(path_st.st_mode):
            try:
                os.remove(path)
            except OSError, err:
                raise FSError('Could not remove a file: ' + path)

            path_stack.pop()
            path_st_stack.pop()
            entries_stack.pop()
            index_stack.pop()
        elif stat.S_ISDIR(path_st.st_mode):
            entries = entries_stack[-1]

            if not entries:
                try:
                    entries_stack[-1] = entries = os.listdir(path)
                except OSError, err:
                    raise FSError('Could not get entry list: ' + path)

            index = index_stack[-1]

            if index < 0:
                index_stack[-1] = index = 0
            else:
                index_stack[-1] = index = index + 1

            if index >= len(entries):
                try:
                    os.rmdir(path)
                except OSError, err:
                    raise FSError('Could not remove a directory: ' + path)

                path_stack.pop()
                path_st_stack.pop()
                entries_stack.pop()
                index_stack.pop()
                continue

            child_path = os.path.join(path, entries[index])
            path_stack.append(child_path)
            path_st_stack.append(None)
            entries_stack.append(None)
            index_stack.append(-1)
        else:
            raise FSError('Unsupported type file: ' + path)

    return

##
# @param src str
# @param dest str
# @return list<MEntry>
def check_different_entries(src, dest):
    # execute rsync with dry run mode, and parse output from command
    src_arg = os.path.join(src, '') #ends with path separator

    pipe = None

    try:
        pipe = subprocess.Popen(['rsync',
                                 '-nrptgov',
                                 '--delete-before', #to detect deleting files
                                 src_arg,
                                 dest],
                                stdin=None,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
    except OSError, err:
        raise ECError('Command execution failed')

    pout = pipe.stdout
    perr = pipe.stderr

    pret = pipe.wait()
    if pret != 0:
        msg = 'Command failed with exit code ' + str(pret) + ":\n"

        line = None
        while True:
            try: line = perr.readline()
            except IOError, err: break
            if not line: break
            msg += '  ' + line

        try:
            pout.close()
            perr.close()
        except IOError, err:
            pass

        raise ECError(msg)

    entries = []

    state = 0
    line = None
    while True:
        try:
            line = pout.readline()
        except IOError, err:
            try:
                pout.close()
                perr.close()
            except IOError, err:
                pass
            raise ECError('Could not parse output from command')

        if not line: break

        line = line.rstrip("\r\n")
        while True:
            if state == 0:
                if line == 'building file list ... done':
                    state = 1
                else:
                    state = -1
            elif state == 1:
                if line.startswith('deleting '):
                    entries.append(MEntry(os.path.normpath(line[9:]),
                                          MEntry.DELETE))
                elif line == './':
                    state = 2
                    continue
                else:
                    state = -1
            elif state == 2:
                if line.startswith('skipping '):
                    pass
                elif line == '':
                    state = -2
                elif line == './':
                    entries.append(MEntry('.', MEntry.MOD_OR_CRE))
                else:
                    entries.append(MEntry(os.path.normpath(line),
                                          MEntry.MOD_OR_CRE))
            break
        if state < 0: break

    try:
        pout.close()
        perr.close()
    except IOError, err:
        raise ECError('Could not parse output from command')

    if state == -1:
        raise ECError('Could not parse output from command')

    return entries

##
# @param entries list<MEntry>
# @param src_dir str
# @param dest_dir str
# @return map<str, stat_result>
def update_mark_and_create_stat_cache(entries,
                                      src_dir,
                                      dest_dir):
    cache = {}

    for entry in entries:
        if entry.path == '.':
            entry.mark = entry.MODIFY
            write('M ' + entry.path + "\n")
            continue
        if entry.mark == entry.MOD_OR_CRE: #modifying or creating
            dest_st = cache.get(entry.path)
            if not dest_st:
                dest_path = os.path.join(dest_dir, entry.path)
                try:
                    dest_st = os.lstat(dest_path)
                except OSError, err:
                    if err.errno == errno.ENOENT:
                        entry.mark = entry.CREATE
                        write('C ' + entry.path + "\n")
                        continue
                    raise FSError('Could not get stat info of file: ' +
                                    dest_path)
                cache[entry.path] = dest_st
            # remove the entry which is incorrectly detected
            # FIXME: when both directories have different size (? e.g. cifs),
            #        rsync always detect a change of directory
            if stat.S_ISDIR(dest_st.st_mode):
                src_st = None
                src_path = os.path.join(src_dir, entry.path)
                try:
                    src_st = os.lstat(src_path)
                except OSError, err:
                    raise FSError('Could not get stat info of file: ' +
                                    src_path)
                if dest_st.st_mtime == src_st.st_mtime:
                    entry.mark = entry.IGNORE
                    write('I ' + entry.path + "\n")
                    continue
            entry.mark = entry.MODIFY
            write('M ' + entry.path + "\n")
        elif entry.mark == entry.DELETE: #deleting
            dest_st = cache.get(entry.path)
            if not dest_st:
                dest_path = os.path.join(dest_dir, entry.path)
                try:
                    dest_st = os.lstat(dest_path)
                except OSError, err:
                    raise FSError('Could not get stat info of file: ' +
                                    dest_path)
                cache[entry.path] = dest_st
            write('D ' + entry.path + "\n")
        else:
            raise GMIError('Unknown type mark ' + str(entry.mark) +
                             ': ' + entry.path)

        dirs = get_parent_directories(entry.path)
        for dir in dirs:
            dest_st = cache.get(dir)
            if not dest_st:
                dest_path = os.path.join(dest_dir, dir)
                try:
                    dest_st = os.lstat(dest_path)
                except OSError, err:
                    raise FSError('Could not get stat info of directory: ' +
                                    dest_path)
                cache[dir] = dest_st

    return cache

##
# @param src_dir str
# @param dest_dir str
# @param entries list<MEntry>
# @param stat_cache map<str, stat_result>
# @return None
def store_entries(src_dir, dest_dir, entries, stat_cache):
    moddirs = set()

    for entry in entries:
        if entry.path == '.': continue
        if entry.mark == entry.CREATE: pass #skip creating
        elif entry.mark == entry.MODIFY: #modifying
            src_st = stat_cache.get(entry.path)
            if not src_st:
                raise FSError('Could not find stat info of file: ' +
                                 os.path.join(src_dir, entry.path))

            src = os.path.join(src_dir, entry.path)
            dest = os.path.join(dest_dir, entry.path)

            try:
                make_parent_directories(dest)
            except FSError, err:
                raise

            if stat.S_ISLNK(src_st.st_mode):
                try:
                    os.symlink(os.readlink(src), dest)
                except OSError, err:
                    raise FSError('Could not create a symlink: ' + dest)
            elif stat.S_ISREG(src_st.st_mode):
                try:
                    shutil.copy(src, dest)
                except shutil.Error, err:
                    raise FSError('Could not copy a file to a directory: ' +
                                    entry.path)
            elif stat.S_ISDIR(src_st.st_mode):
                try:
                    os.mkdir(dest)
                except OSError, err:
                    if err.errno != errno.EEXIST:
                        raise FSError('Could not create a directory: ' + dest)
            else:
                raise GMIError('Unsupported type file: ' + src)
        elif entry.mark == entry.DELETE: #deleting
            # timestamp of a parent directory will be restored later
            src_parent = os.path.dirname(entry.path)
            moddirs.add(src_parent == '' and '.' or src_parent)

            src_st = stat_cache.get(entry.path)
            if not src_st:
                raise FSError('Could not find stat info of file: ' +
                                 os.path.join(src_dir, entry.path))

            src = os.path.join(src_dir, entry.path)
            dest = os.path.join(dest_dir, entry.path)

            try:
                make_parent_directories(dest)
            except FSError, err:
                raise

            if stat.S_ISLNK(src_st.st_mode) or \
               stat.S_ISREG(src_st.st_mode):
                try:
                    shutil.move(src, dest)
                except shutil.Error, err:
                    raise FSError('Could not move a directory '
                                  'to a directory: ' + entry.path)
            elif stat.S_ISDIR(src_st.st_mode):
                try:
                    os.mkdir(dest)
                except OSError, err:
                    if err.errno != errno.EEXIST:
                        raise FSError('Could not create a directory ' + dest)
                try:
                    os.rmdir(src)
                except OSError, err:
                    raise FSError('Could not remove a directory: ' + src)

                # remove deleted directory from moddirs
                moddirs.discard(entry.path)
            else:
                raise GMIError('Unsupported type file: ' + src)
        elif entry.mark == entry.IGNORE: pass #ignore
        else:
            raise GMIError('Unknown type mark' + str(entry.mark) +
                             ': ' + entry.path)

    # restore timestamp of directories under the current generation directory
    for moddir in moddirs:
        moddir_st = stat_cache.get(moddir)
        if not moddir_st:
            raise FSError('Could not find stat info of directory: ' + moddir)
        path = os.path.join(src_dir, moddir)
        try:
            os.utime(path, (moddir_st.st_atime, moddir_st.st_mtime))
        except OSError, err:
            raise FSError('Could not set timestamp of directory: ' + path)

    # sync timestamp of directories under the older generation directory
    relpath_stack = ['.']
    elist_stack = [None]
    index_stack = [-1]
    while True:
        if len(relpath_stack) <= 0: break

        relpath = relpath_stack[-1]
        relpath_st = stat_cache.get(relpath)
        if not relpath_st:
            path = relpath != '.' and \
              os.path.join(dest_dir, relpath) or dest_dir
            raise FSError('Could not find stat info of file: ' + path)

        if stat.S_ISLNK(relpath_st.st_mode):
            # in case of symlink, can not sync a timestamp
            relpath_stack.pop()
            elist_stack.pop()
            index_stack.pop()
        elif stat.S_ISREG(relpath_st.st_mode):
            path = relpath != '.' and \
              os.path.join(dest_dir, relpath) or dest_dir
            try:
                os.utime(path, (relpath_st.st_atime, relpath_st.st_mtime))
            except OSError, err:
                raise FSError('Could not set stat info of file: ' + path)

            relpath_stack.pop()
            elist_stack.pop()
            index_stack.pop()
        elif stat.S_ISDIR(relpath_st.st_mode):
            path = relpath != '.' and \
              os.path.join(dest_dir, relpath) or dest_dir
            elist = elist_stack[-1]

            if not elist:
                try:
                    elist_stack[-1] = elist = os.listdir(path)
                except OSError, err:
                    raise FSError('Could not get entry list: ' + path)

            index = index_stack[-1]

            if index < 0:
                index_stack[-1] = index = 0
            else:
                index_stack[-1] = index = index + 1

            if index >= len(elist):
                try:
                    os.utime(path, (relpath_st.st_atime, relpath_st.st_mtime))
                except OSError, err:
                    raise FSError('Could not set stat info of file: ' + path)

                relpath_stack.pop()
                elist_stack.pop()
                index_stack.pop()
                continue

            child_relpath = relpath != '.' and \
              os.path.join(relpath, elist[index]) or elist[index]
            relpath_stack.append(child_relpath)
            elist_stack.append(None)
            index_stack.append(-1)
        else:
            raise GMIError('Unsupported type file: ' + relpath)

    return

##
# @param base_dir str
# @param gen_num
# @return None
def rotate_older_generation_directories(base_dir, gen_num):
    sentries = []

    entries = None
    try:
        entries = os.listdir(base_dir)
    except OSError, err:
        raise FSError('Could not get entry list: ' + base_dir)
    og_dir_re = re.compile("^\d{4}-\d{2}-\d{2}-\d{2}$")
    for entry in entries:
        if not og_dir_re.match(entry): continue
        path = os.path.join(base_dir, entry)
        entry_st = None
        try:
            entry_st = os.stat(path)
        except OSError, err:
            raise FSError('Could not get stat info of file: ' + path)
        if stat.S_ISDIR(entry_st.st_mode):
            sentries.append(SEntry(path, entry_st))

    # sort sentries in new order
    sentries.sort(lambda a, b: -cmp(a.stat.st_mtime, b.stat.st_mtime))

    while len(sentries) > gen_num:
        sentrie = sentries.pop()
        try:
            remove_files_recursively(sentrie.path)
        except FSError, err:
            raise

    return

##
# @param src str
# @param dest
# @param mentries list<MEntry>
# @return None
def sync_directory(src, dest, mentries):
    src_arg = os.path.join(src, '') #ends with path separator

    pipe = None

    try:
        pipe = subprocess.Popen(['rsync',
                                 '-rptgov',
                                 '--files-from=-',
                                 src_arg,
                                 dest],
                                stdin=subprocess.PIPE,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
    except OSError, err:
        raise ECError('Command execution failed')

    pin = pipe.stdin
    pout = pipe.stdout
    perr = pipe.stderr

    for mentry in mentries:
        if mentry.mark == mentry.MODIFY or \
           mentry.mark == mentry.CREATE:
            try:
                pin.write(mentry.path + "\n")
            except IOError, err:
                try:
                    pin.close()
                    pout.close()
                    perr.close()
                except IOError, err:
                    pass
                raise ECError('Writing to a pipe failed')
            write(mentry.path + "\n")
        elif mentry.mark == mentry.DELETE: pass
        elif mentry.mark == mentry.IGNORE: pass
        else:
            raise GMIError('Unknown type mark ' + str(mentry.mark) +
                             ': ' + mentry.path)

    try:
        pin.close()
    except IOError, err:
        raise ECError('Closing to a pipe failed')

    pret = pipe.wait()
    if pret != 0:
        msg = 'Command failed with exit code ' + str(pret) + ":\n"

        line = None
        while True:
            try: line = perr.readline()
            except IOError, err: pass
            if not line: break
            msg += '  ' + line

        try:
            pout.close()
            perr.close()
        except IOError, err:
            pass

        raise ECError(msg)

    try:
        pout.close()
        perr.close()
    except IOError, err:
        raise ECError('Closing to a pipe failed')

    return

##
# @param argv list<str>
# @return int
def main(argv):
    # parse command line arguments
    if len(argv) < 2:
        print_usage()
        return 0

    opts = None
    args = None
    try:
        (opts, args) = getopt.getopt(argv[1:], 'g:', ['help', 'version'])
    except getopt.GetoptError, err:
        print_error('Could not analyze arguments')
        traceback.print_exc()
        return 1

    gen_num = 1

    for (name, value) in opts:
        if name == '-g':
            num = 0
            try:
                num = int(value, 10)
            except ValueError, err:
                print_error('Option -g only allow positive integer')
                traceback.print_exc()
                return 1
            if num < 1:
                print_error('Option -g only allow positive integer')
                return 1
            gen_num = num
        elif name == '--help':
            print_usage()
            return 0
        elif name == '--version':
            print_version()
            return 0

    if len(args) < 2 or len(args) > 2:
        print_usage()
        return 0

    src = args[0]
    src_st = None
    dest = args[1]
    dest_st = None

    try:
        src_st = os.stat(src)
        dest_st = os.stat(dest)
    except OSError, err:
        pass

    if not src_st or not stat.S_ISDIR(src_st.st_mode):
        print_error('Source must be a directory: ' + src)
        return 1
    if not dest_st or not stat.S_ISDIR(dest_st.st_mode):
        print_error('Destination must be a directory: ' + dest)
        return 1

    print """
Source: %s
Destination: %s
Number of older generations: %d
""" % (src, dest, gen_num)

    # create a directory to store current generation
    dest_cur = os.path.join(dest, 'current')
    dest_cur_st = None

    write('Preparing a directory to store current generation ... ')
    try:
        dest_cur_st = make_directory_and_get_stat(dest_cur)
    except FSError, err:
        write("\n")
        print_error('Could not create a directory '
                      'to store current generation: ' + dest_cur)
        traceback.print_exc()
        return 1
    write("Done\n")

    # make dest_cur to always be updated, to detect deleting files
    if src_st.st_mtime <= dest_cur_st.st_mtime:
        write('Setting the current generation directory expired ... ')
        try:
            if os.stat_float_times():
                os.utime(dest_cur,
                         (src_st.st_mtime - 1.0, src_st.st_mtime - 1.0))
            else:
                os.utime(dest_cur,
                         (src_st.st_mtime - 1, src_st.st_mtime - 1))
        except OSError, err:
            write("\n")
            print_error('Could not set stat info of directory: ' + dest_cur)
            return 1
        write("Done\n")

    # check different entries between src and dest_cur
    mentries = None

    write("Checking different entries ... \n")
    try:
        mentries = check_different_entries(src, dest_cur)
    except ECError, err:
        print_error('Could not check different entries')
        traceback.print_exc()
        return 1

    # sort mentries in deep order
    mentries.sort(lambda a, b: -cmp(a, b))

    #update mark of entries,
    #and cache stat info of files and parent directories
    stat_cache = None

    try:
        stat_cache = \
          update_mark_and_create_stat_cache(mentries, src, dest_cur)
    except FSError, err:
        print_error('Could not create a stat cache')
        traceback.print_exc()
        return 1
    stat_cache['.'] = dest_cur_st

    # count entries that will be removed
    mcount = 0
    for mentry in mentries:
        if mentry.mark == mentry.MODIFY: mcount += 1
        elif mentry.mark == mentry.CREATE: pass
        elif mentry.mark == mentry.DELETE: mcount += 1
        elif mentry.mark == mentry.IGNORE: pass
        else:
            print_error('Unknown type mark ' + str(mentry.mark) +
                          ': ' + mentry.path)
            return 1

    # create a directory to store older generation, and
    # store entries to the direcotry
    if mcount > 1:
        dest_old = None

        write('Preparing a directory to store older generation ... ')
        try:
            dest_old = make_older_generation_directory(dest)
        except FSError, err:
            write("\n")
            print_error('Could not create a directory'
                          'to store older generation')
            traceback.print_exc()
            return 1
        write("Done\n")

        write('Storing entries to the older generation directory ... ')
        try:
            store_entries(dest_cur, dest_old, mentries, stat_cache)
        except FSError, err:
            write("\n")
            print_error('Could not store entries to the directory '
                          'to store older generation')
            traceback.print_exc()
            return 1
        write("Done\n")

    # rotate older generation directories
    write('Rotating older generation directories ... ')
    try:
        rotate_older_generation_directories(dest, gen_num)
    except FSError, err:
        write("\n")
        print_error('Could not rotate older generation directories')
        traceback.print_exc()
        return 1
    write("Done\n")

    # actually sync the current generation directory with the source directory
    write("Syncronizing the directory ... \n")
    try:
        sync_directory(src, dest_cur, mentries)
    except ECError, err:
        print_error('Syncronizing the directory failed')
        traceback.print_exc()
        return 1

    write("\n")

    return 0

# execute
if __name__ == '__main__':
    sys.exit(main(sys.argv))

